Optimize memcpy for x86 arch. If source buffers does not start at a 64

author Keir Fraser <keir.fraser@citrix.com>

Wed, 7 Oct 2009 06:45:39 +0000 (07:45 +0100)

committer Keir Fraser <keir.fraser@citrix.com>

Wed, 7 Oct 2009 06:45:39 +0000 (07:45 +0100)
author Keir Fraser <keir.fraser@citrix.com>
Wed, 7 Oct 2009 06:45:39 +0000 (07:45 +0100)
committer Keir Fraser <keir.fraser@citrix.com>
Wed, 7 Oct 2009 06:45:39 +0000 (07:45 +0100)
diff --git a/xen/include/asm-x86/string.h b/xen/include/asm-x86/string.h

index 496b22754fccf8742f95077ad130641b4cb02c8d..c3481216eedac1978ae312e473e0d0379b345320 100644 (file)
--- a/xen/include/asm-x86/string.h
+++ b/xen/include/asm-x86/string.h
@@ -96,13 +96,29 @@ static always_inline void * __constant_memcpy(
  }
  
  #define __HAVE_ARCH_MEMCPY
+/* align source to a 64-bit boundary */
+static always_inline
+void *__var_memcpy(void *t, const void *f, size_t n)
+{
+    int off = (unsigned long)f & 0x7;
+    /* just do alignment if needed and if size is worth */
+    if ( (n > 32) && off ) {
+        size_t n1 = 8 - off;
+        __variable_memcpy(t, f, n1);
+        __variable_memcpy(t + n1, f + n1, n - n1);
+        return t;
+    } else {
+            return (__variable_memcpy(t, f, n));
+    }
+}
+
  #define memcpy(t,f,n) (__memcpy((t),(f),(n)))
  static always_inline
  void *__memcpy(void *t, const void *f, size_t n)
  {
      return (__builtin_constant_p(n) ?
              __constant_memcpy((t),(f),(n)) :
-            __variable_memcpy((t),(f),(n)));
+            __var_memcpy((t),(f),(n)));
  }
  
  /* Some version of gcc don't have this builtin. It's non-critical anyway. */
author	Keir Fraser <keir.fraser@citrix.com>
	Wed, 7 Oct 2009 06:45:39 +0000 (07:45 +0100)
committer	Keir Fraser <keir.fraser@citrix.com>
	Wed, 7 Oct 2009 06:45:39 +0000 (07:45 +0100)